###############################################################################################
## Anna Weisfeiler, Abdul Ahad Tariq, Derick Bowen
## Importing the Bomb (Kroenig 2009) Replication Project
## April 29, 2010
###############################################################################################
options(repos=c(CRAN="http://software.rc.fas.harvard.edu/mirrors/R/",
CRANextra="http://www.stats.ox.ac.uk/pub/RWin"))

install.packages("xtable")
install.packages("foreign")
install.packages("survival")
install.packages("Matching")
install.packages("rgenoud")
library("xtable")
library("foreign")
library("survival")
library("Matching")
library("rgenoud")

## Imports the data & attaches it as the default dataset
data <- read.dta("nucass_cinc.dta")

## CINC is a probability. Multiplying it by 100 normalizes it so that it is percentage points
cinc100 <-data$cinc*100
data <- cbind(data, cinc100)

############################################ Model 2 (Table 1) ############################################
survival.formula <- function(dataset){
     dataset$risk0 <- dataset$risk1 - 1
     changes <- match(unique(dataset$cowcc), dataset$cowcc)
     dataset$risk0[changes] <- 0
     formula=Surv(dataset$risk0, dataset$risk1, dataset$acquire)
     return(formula)
}

##Gets the uncensored survival formula
uncensored=survival.formula(data)

model.2.org <- coxph(uncensored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5
                      +cluster(cowcc),data, method="breslow")
model.2.org.se <- sqrt(diag(model.2.org$var))

model.2.a <- coxph(uncensored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5+cinc100
                      +cluster(cowcc), data, method="breslow")
model.2.a.se <- sqrt(diag(model.2.a$var))

model.2.b <- coxph(uncensored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5
                      +cinc100+nucass:cinc100
                      +cluster(cowcc), data, method="breslow")
model.2.b.se <- sqrt(diag(model.2.b$var))



############################################ Model 4 (Table 2) ############################################
## Censored Model
data<- data[data$program!=0,]

##Gets the uncensored survival formula
censored <- survival.formula(data.program)

model.4.org <- coxph(censored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5
                      +cluster(cowcc),data, method="breslow")
model.4.org.se <- sqrt(diag(model.4.org$var))

model.4.a <- coxph(censored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5+cinc100
                      +cluster(cowcc), data, method="breslow")
model.4.a.se <- sqrt(diag(model.4.a$var))

model.4.b <- coxph(censored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5
                      +cinc100+nucass:cinc100
                      +cluster(cowcc), data, method="breslow")
model.4.b.se <- sqrt(diag(model.4.b$var))


######################################## Table 3 ##############################################
## Percentage change in Hazard ratios for unit change in given explanatory variable
## for the censored and uncensored models

percent.change <- matrix(NA, 4, 6)
colnames(percent.change) <- c("uncensored-original", "uncensored A", "B", "censored-original", "censored A", "B")
rownames(percent.change) <- c("nucass", "industry", "polity", "cinc")

## Call the Hazard Ratio, which is just the coefficient exponentiate    
## Convert to percentage change

## Uncensored (Table 2, Model 2 Original)
percent.change[,1] <-  rbind(((exp(model.2.org$coef[1])-1)*100),((exp(model.2.org$coef[4])-1)*100),
                              ((exp(model.2.org$coef[7])-1)*100), ((exp(model.2.org$coef[10])-1)*100))

## Uncensored (Table 2, Model 2A)
percent.change[,2] <-  rbind(((exp(model.2.a$coef[1])-1)*100),((exp(model.2.a$coef[4])-1)*100),
                              ((exp(model.2.a$coef[7])-1)*100), ((exp(model.2.a$coef[10])-1)*100))

## Uncensored (Table 2, Model 2B)
percent.change[,3] <-  rbind(((exp(model.2.b$coef[1])-1)*100),((exp(model.2.b$coef[4])-1)*100),
                              ((exp(model.2.b$coef[7])-1)*100), ((exp(model.2.b$coef[10])-1)*100))


## Censored (Table 2, Model 4 Original)
percent.change[,4] <-  rbind(((exp(model.4.org$coef[1])-1)*100),((exp(model.4.org$coef[4])-1)*100),
                              ((exp(model.4.org$coef[7])-1)*100),((exp(model.4.org$coef[10])-1)*100))

## Censored (Table 2, Model 4A)
percent.change[,5] <-  rbind(((exp(model.4.a$coef[1])-1)*100),((exp(model.4.a$coef[4])-1)*100),
                              ((exp(model.4.a$coef[7])-1)*100),((exp(model.4.a$coef[10])-1)*100))

## Censored (Table 2, Model 4B)
percent.change[,6] <-  rbind(((exp(model.4.b$coef[1])-1)*100),((exp(model.4.b$coef[4])-1)*100),
                              ((exp(model.4.b$coef[7])-1)*100),((exp(model.4.b$coef[10])-1)*100))

## This shows the matrix with the hazard ratios percent changes
percent.change
















############################################Table 4############################################
## This is the original Table 2 data with all four models

survival.formula <- function(dataset){
     dataset$risk0 <- dataset$risk1 - 1
     changes <- match(unique(dataset$cowcc), dataset$cowcc)
     dataset$risk0[changes] <- 0
     formula=Surv(dataset$risk0, dataset$risk1, dataset$acquire)
     return(formula)
}

##Gets the uncensored survival formula
uncensored=survival.formula(data)

## Table 2, model 1
model.1<-coxph(uncensored~nucass+cluster(cowcc), data, method="breslow")

## Table 2, model 2
model.2 <- coxph(uncensored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5+cluster(cowcc),
                       data, method="breslow")

## Table 2, model 3
model.3 <- coxph(uncensored~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+cluster(cowcc), data, method="breslow")

## Table 2, model 4
## Censored Hazard Model -- only those that have programs
data.program <- data[data$program!=0,]
censored <- survival.formula(data.program)

model.4 <- coxph(censored~nucass+gdpcap+g2+industry+rivalry
                                  +allies+polity+openness+chopen5
                                  +cluster(cowcc), data.program, method="breslow")


######################################## Table 5 #############################################
## This section attempts to replicate the author's Table 4.
## We have preserved the author's original code as much as possible.

## Importing data again to make sure that we have a totally clean dataset
In.Nucass <- read.dta("nucass_cinc.dta")
attach(In.Nucass)

## Create matrices containing the variables we wish to match and achieve balance on:

X <- cbind(gdpcap, g2, industry, rivalry, allies, polity, openness, chopen5)

BalanceMat <- cbind(gdpcap, g2, industry, rivalry, allies, polity, openness, chopen5)

## We have added the seed to be able to produce consistent results that may otherwise
## vary due to a random element in GenMatch that disgards controls that are tied
set.seed(12345)

## According to the R-help file, the GenMatch function "finds the optimal balance using 
## multivariate matching where a genetic search algorithm determines the weight each covariate 
## is given."

genout <- GenMatch(Tr=nucass, X=X, BalanceMatrix=BalanceMat, estimand="ATT",
                   M=1, pop.size=1000, max.generations=100, wait.generations=10,
                   int.seed=7, unif.seed=10, data.type.int=FALSE)

## Code run through here ##

## The output from the GenMatch function is fed into the Match function and specifies the weight 
## that the Match function should give to each covariate in order to achieve balance between the
## treatment and control groups.

mout <- Match(Y=acquire, Tr=nucass, X=X, estimand="ATT", Weight.matrix=genout, M=1)
summary(mout)

p.match <- rbind(In.Nucass[mout$index.treated,], In.Nucass[mout$index.control,])

#Saves the balance statistics in a stata dataset
write.dta(p.match,"nucass_post_match.dta")

# CHECK BALANCE:

mb <- MatchBalance ( nucass ~ (gdpcap + g2 + industry + rivalry + allies + polity + openness 
                     + chopen5), match.out=mout, nboots=0, ks=TRUE)

############################################ Table 6 ##########################################
## This section repeats the process used to generate table 4, except treatment observations for 
## which a control with close enough covariates is not found are dropped. We specify this 
## tolerance using the caliper functions.

mout.exact <- Match(Y=acquire, Tr=nucass, X=X, estimand="ATT",  caliper=1, 
                  Weight.matrix=genout, M=1)
summary(mout.exact)

p.match.exact<- rbind(In.Nucass[mout.exact$index.treated,], In.Nucass[mout.exact$index.control,])

#Saves the balance statistics in a stata dataset
write.dta(p.match.exact,"nucass_post_match_exact.dta")

# CHECK BALANCE:

mb.exact <- MatchBalance ( nucass ~ (gdpcap + g2 + industry + rivalry + allies + polity  
                          + openness + chopen5), match.out=mout.exact, nboots=0, ks=TRUE)

######################################## Table 7a ##############################################
## Importing post-match data as the author had it. This should be equal to p.match
post.match <- read.dta("nucass_post_match.dta")

## Data correction. See Table 2 code for why.

post.match$gdpcap <-post.match$gdpcap/1000
post.match$g2 <-post.match$g2/1000

uncensored.post.match=survival.formula(post.match)

## Hazard Model for Post-Matched Data with "exact" option NOT specified
model.2.pm <- coxph(uncensored.post.match~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5+cluster(cowcc),
                       data=post.match, method="breslow")

summary(model.2.pm)

######################################## Table 7b ##############################################
## Importing post-match data with exact specified. This should be equal to p.match.exact
post.match.exact <- read.dta("nucass_post_match_exact.dta")

## Data correction. See Table 2 code for why.

post.match.exact$gdpcap <-post.match.exact$gdpcap/1000
post.match.exact$g2 <-post.match.exact$g2/1000

uncensored.post.match.exact=survival.formula(post.match.exact)

## Hazard Model for Post-Matched Data with "exact" option NOT specified
model.2.pme <- coxph(uncensored.post.match.exact~nucass+gdpcap+g2+industry+rivalry
                      +allies+polity+openness+chopen5+cluster(cowcc),
                       data=post.match.exact, method="breslow")

summary(model.2.pme)

